library(tidyverse)
library(lubridate)Lab 7: Data Tidying, Transformation and Visualization with COVID-19 reporting data
Loading data from a github repository
if (!dir.exists("data")) dir.create("data")download.file(
url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
destfile = "data/time_series_covid19_confirmed_global.csv"
)time_series_confirmed <- read_csv("data/time_series_covid19_confirmed_global.csv") |>
rename(Province_State = "Province/State", Country_Region = "Country/Region")Data Tidying - Pivoting
time_series_confirmed_long <- time_series_confirmed |>
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Confirmed") Dates and time
time_series_confirmed_long$Date <- mdy(time_series_confirmed_long$Date)Making Graphs from the time series data
time_series_confirmed_long|>
group_by(Country_Region, Date) |>
summarise(Confirmed = sum(Confirmed)) |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Confirmed)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Confirmed Cases")time_series_confirmed_long |>
group_by(Country_Region, Date) |>
summarise(Confirmed = sum(Confirmed)) |>
filter (Country_Region %in% c("China","France","Italy",
"Korea, South", "US")) |>
ggplot(aes(x = Date, y = Confirmed, color = Country_Region)) +
geom_point() +
geom_line() +
ggtitle("COVID-19 Confirmed Cases")time_series_confirmed_long_daily <- time_series_confirmed_long |>
group_by(Country_Region, Date) |>
summarise(Confirmed = sum(Confirmed), .groups = "drop") |>
mutate(Daily = Confirmed - lag(Confirmed, default = first(Confirmed)))time_series_confirmed_long_daily |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Daily, color = Country_Region)) +
geom_point() +
ggtitle("COVID-19 Confirmed Cases")time_series_confirmed_long_daily |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Daily, color = Country_Region)) +
geom_line() +
ggtitle("COVID-19 Confirmed Cases")time_series_confirmed_long_daily |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Daily, color = Country_Region)) +
geom_smooth() +
ggtitle("COVID-19 Confirmed Cases")time_series_confirmed_long_daily |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Daily, color = Country_Region)) +
geom_smooth(method = "gam", se = FALSE) +
ggtitle("COVID-19 Confirmed Cases")Animated Graphs with gganimate
library(ggplot2)
library(gganimate)
library(gifski)
theme_set(theme_bw())daily_counts <- time_series_confirmed_long_daily |>
filter (Country_Region == "US")
p <- ggplot(daily_counts, aes(x = Date, y = Daily, color = Country_Region)) +
geom_point() +
ggtitle("Confirmed COVID-19 Cases") +
# gganimate lines
geom_point(aes(group = seq_along(Date))) +
transition_reveal(Date)
# make the animation
animate(p, renderer = gifski_renderer(), end_pause = 15)anim_save("daily_counts_US.gif", p)# This download may take about 5 minutes. You only need to do this once.
download.file(
url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv",
destfile = "data/time_series_covid19_deaths_global.csv"
)time_series_deaths_confirmed <- read_csv("data/time_series_covid19_deaths_global.csv")|>
rename(Province_State = "Province/State", Country_Region = "Country/Region")
time_series_deaths_long <- time_series_deaths_confirmed |>
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Confirmed")
time_series_deaths_long$Date <- mdy(time_series_deaths_long$Date)p <- time_series_deaths_long |>
filter (Country_Region %in% c("US","Canada", "Mexico","Brazil","Egypt","Ecuador","India", "Netherlands", "Germany", "China" )) |>
ggplot(aes(x=Country_Region, y=Confirmed, color= Country_Region)) +
geom_point(aes(size=Confirmed)) +
transition_time(Date) +
labs(title = "Cumulative Deaths: {frame_time}") +
ylab("Deaths") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
# make the animation
animate(p, renderer = gifski_renderer(), end_pause = 15)Exercises
Exercise 1
# Compute rate per 10,000
table1 |>
mutate(rate = cases / population * 10000)# A tibble: 6 × 5
country year cases population rate
<chr> <dbl> <dbl> <dbl> <dbl>
1 Afghanistan 1999 745 19987071 0.373
2 Afghanistan 2000 2666 20595360 1.29
3 Brazil 1999 37737 172006362 2.19
4 Brazil 2000 80488 174504898 4.61
5 China 1999 212258 1272915272 1.67
6 China 2000 213766 1280428583 1.67
# Compute total cases per year
table1 |>
group_by(year) |>
summarize(total_cases = sum(cases))# A tibble: 2 × 2
year total_cases
<dbl> <dbl>
1 1999 250740
2 2000 296920
# Visualize changes over time
ggplot(table1, aes(x = year, y = cases)) +
geom_line(aes(group = country), color = "grey50") +
geom_point(aes(color = country, shape = country)) +
scale_x_continuous(breaks = c(1999, 2000)) # x-axis breaks at 1999 and 20005.2.1 Exercises
1.
1: observation - rate per 10000 people
2: observation - total cases per year
3: observations - case changes from 1999 to 2000 in Afghanistan, Brazil, and China
country = name of the country
year = year of the observation
cases= number of TB cases reported
population = total population of the country that year
rate = TB cases per 10,000 people (cases/population * 10000)
2. Sketching the Rate Calculation for table2 and table3
- a. Extract TB cases per country-year
- From `table2` and `table3`, reshape it to long format: one row per country-year
b. Join the two tables
- Match on
countryandyearto align cases and population
- Match on
c. Calculate rate
- Use
mutate(rate = cases / population * 10000)
- Use
d. Store the result
- Either as a new column in the joined table or as a new tibble
5.3.1 Data in column names
billboard# A tibble: 317 × 79
artist track date.entered wk1 wk2 wk3 wk4 wk5 wk6 wk7 wk8
<chr> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2 Pac Baby… 2000-02-26 87 82 72 77 87 94 99 NA
2 2Ge+her The … 2000-09-02 91 87 92 NA NA NA NA NA
3 3 Doors D… Kryp… 2000-04-08 81 70 68 67 66 57 54 53
4 3 Doors D… Loser 2000-10-21 76 76 72 69 67 65 55 59
5 504 Boyz Wobb… 2000-04-15 57 34 25 17 17 31 36 49
6 98^0 Give… 2000-08-19 51 39 34 26 26 19 2 2
7 A*Teens Danc… 2000-07-08 97 97 96 95 100 NA NA NA
8 Aaliyah I Do… 2000-01-29 84 62 51 41 38 35 35 38
9 Aaliyah Try … 2000-03-18 59 53 38 28 21 18 16 14
10 Adams, Yo… Open… 2000-08-26 76 76 74 69 68 67 61 58
# ℹ 307 more rows
# ℹ 68 more variables: wk9 <dbl>, wk10 <dbl>, wk11 <dbl>, wk12 <dbl>,
# wk13 <dbl>, wk14 <dbl>, wk15 <dbl>, wk16 <dbl>, wk17 <dbl>, wk18 <dbl>,
# wk19 <dbl>, wk20 <dbl>, wk21 <dbl>, wk22 <dbl>, wk23 <dbl>, wk24 <dbl>,
# wk25 <dbl>, wk26 <dbl>, wk27 <dbl>, wk28 <dbl>, wk29 <dbl>, wk30 <dbl>,
# wk31 <dbl>, wk32 <dbl>, wk33 <dbl>, wk34 <dbl>, wk35 <dbl>, wk36 <dbl>,
# wk37 <dbl>, wk38 <dbl>, wk39 <dbl>, wk40 <dbl>, wk41 <dbl>, wk42 <dbl>, …
billboard |>
pivot_longer(
cols = starts_with("wk"),
names_to = "week",
values_to = "rank"
)# A tibble: 24,092 × 5
artist track date.entered week rank
<chr> <chr> <date> <chr> <dbl>
1 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk1 87
2 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk2 82
3 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk3 72
4 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk4 77
5 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk5 87
6 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk6 94
7 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk7 99
8 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk8 NA
9 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk9 NA
10 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk10 NA
# ℹ 24,082 more rows
billboard |>
pivot_longer(
cols = starts_with("wk"),
names_to = "week",
values_to = "rank",
values_drop_na = TRUE
)# A tibble: 5,307 × 5
artist track date.entered week rank
<chr> <chr> <date> <chr> <dbl>
1 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk1 87
2 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk2 82
3 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk3 72
4 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk4 77
5 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk5 87
6 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk6 94
7 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk7 99
8 2Ge+her The Hardest Part Of ... 2000-09-02 wk1 91
9 2Ge+her The Hardest Part Of ... 2000-09-02 wk2 87
10 2Ge+her The Hardest Part Of ... 2000-09-02 wk3 92
# ℹ 5,297 more rows
billboard_longer <- billboard |>
pivot_longer(
cols = starts_with("wk"),
names_to = "week",
values_to = "rank",
values_drop_na = TRUE
) |>
mutate(
week = parse_number(week)
)billboard_longer |>
ggplot(aes(x = week, y = rank, group = track)) +
geom_line(alpha = 0.25) +
scale_y_reverse()5.3.2 How does pivoting work?
df <- tribble(
~id, ~bp1, ~bp2,
"A", 100, 120,
"B", 140, 115,
"C", 120, 125
)df |>
pivot_longer(
cols = bp1:bp2,
names_to = "measurement",
values_to = "value"
)# A tibble: 6 × 3
id measurement value
<chr> <chr> <dbl>
1 A bp1 100
2 A bp2 120
3 B bp1 140
4 B bp2 115
5 C bp1 120
6 C bp2 125
5.3.3 Many variables in column names
who2# A tibble: 7,240 × 58
country year sp_m_014 sp_m_1524 sp_m_2534 sp_m_3544 sp_m_4554 sp_m_5564
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Afghanistan 1980 NA NA NA NA NA NA
2 Afghanistan 1981 NA NA NA NA NA NA
3 Afghanistan 1982 NA NA NA NA NA NA
4 Afghanistan 1983 NA NA NA NA NA NA
5 Afghanistan 1984 NA NA NA NA NA NA
6 Afghanistan 1985 NA NA NA NA NA NA
7 Afghanistan 1986 NA NA NA NA NA NA
8 Afghanistan 1987 NA NA NA NA NA NA
9 Afghanistan 1988 NA NA NA NA NA NA
10 Afghanistan 1989 NA NA NA NA NA NA
# ℹ 7,230 more rows
# ℹ 50 more variables: sp_m_65 <dbl>, sp_f_014 <dbl>, sp_f_1524 <dbl>,
# sp_f_2534 <dbl>, sp_f_3544 <dbl>, sp_f_4554 <dbl>, sp_f_5564 <dbl>,
# sp_f_65 <dbl>, sn_m_014 <dbl>, sn_m_1524 <dbl>, sn_m_2534 <dbl>,
# sn_m_3544 <dbl>, sn_m_4554 <dbl>, sn_m_5564 <dbl>, sn_m_65 <dbl>,
# sn_f_014 <dbl>, sn_f_1524 <dbl>, sn_f_2534 <dbl>, sn_f_3544 <dbl>,
# sn_f_4554 <dbl>, sn_f_5564 <dbl>, sn_f_65 <dbl>, ep_m_014 <dbl>, …
who2 |>
pivot_longer(
cols = !(country:year),
names_to = c("diagnosis", "gender", "age"),
names_sep = "_",
values_to = "count"
)# A tibble: 405,440 × 6
country year diagnosis gender age count
<chr> <dbl> <chr> <chr> <chr> <dbl>
1 Afghanistan 1980 sp m 014 NA
2 Afghanistan 1980 sp m 1524 NA
3 Afghanistan 1980 sp m 2534 NA
4 Afghanistan 1980 sp m 3544 NA
5 Afghanistan 1980 sp m 4554 NA
6 Afghanistan 1980 sp m 5564 NA
7 Afghanistan 1980 sp m 65 NA
8 Afghanistan 1980 sp f 014 NA
9 Afghanistan 1980 sp f 1524 NA
10 Afghanistan 1980 sp f 2534 NA
# ℹ 405,430 more rows
5.3.4 Data and variable names in the column headers
household# A tibble: 5 × 5
family dob_child1 dob_child2 name_child1 name_child2
<int> <date> <date> <chr> <chr>
1 1 1998-11-26 2000-01-29 Susan Jose
2 2 1996-06-22 NA Mark <NA>
3 3 2002-07-11 2004-04-05 Sam Seth
4 4 2004-10-10 2009-08-27 Craig Khai
5 5 2000-12-05 2005-02-28 Parker Gracie
household |>
pivot_longer(
cols = !family,
names_to = c(".value", "child"),
names_sep = "_",
values_drop_na = TRUE
)# A tibble: 9 × 4
family child dob name
<int> <chr> <date> <chr>
1 1 child1 1998-11-26 Susan
2 1 child2 2000-01-29 Jose
3 2 child1 1996-06-22 Mark
4 3 child1 2002-07-11 Sam
5 3 child2 2004-04-05 Seth
6 4 child1 2004-10-10 Craig
7 4 child2 2009-08-27 Khai
8 5 child1 2000-12-05 Parker
9 5 child2 2005-02-28 Gracie
5.4 Widening data
cms_patient_experience# A tibble: 500 × 5
org_pac_id org_nm measure_cd measure_title prf_rate
<chr> <chr> <chr> <chr> <dbl>
1 0446157747 USC CARE MEDICAL GROUP INC CAHPS_GRP… CAHPS for MI… 63
2 0446157747 USC CARE MEDICAL GROUP INC CAHPS_GRP… CAHPS for MI… 87
3 0446157747 USC CARE MEDICAL GROUP INC CAHPS_GRP… CAHPS for MI… 86
4 0446157747 USC CARE MEDICAL GROUP INC CAHPS_GRP… CAHPS for MI… 57
5 0446157747 USC CARE MEDICAL GROUP INC CAHPS_GRP… CAHPS for MI… 85
6 0446157747 USC CARE MEDICAL GROUP INC CAHPS_GRP… CAHPS for MI… 24
7 0446162697 ASSOCIATION OF UNIVERSITY PHYSI… CAHPS_GRP… CAHPS for MI… 59
8 0446162697 ASSOCIATION OF UNIVERSITY PHYSI… CAHPS_GRP… CAHPS for MI… 85
9 0446162697 ASSOCIATION OF UNIVERSITY PHYSI… CAHPS_GRP… CAHPS for MI… 83
10 0446162697 ASSOCIATION OF UNIVERSITY PHYSI… CAHPS_GRP… CAHPS for MI… 63
# ℹ 490 more rows
cms_patient_experience |>
distinct(measure_cd, measure_title)# A tibble: 6 × 2
measure_cd measure_title
<chr> <chr>
1 CAHPS_GRP_1 CAHPS for MIPS SSM: Getting Timely Care, Appointments, and Infor…
2 CAHPS_GRP_2 CAHPS for MIPS SSM: How Well Providers Communicate
3 CAHPS_GRP_3 CAHPS for MIPS SSM: Patient's Rating of Provider
4 CAHPS_GRP_5 CAHPS for MIPS SSM: Health Promotion and Education
5 CAHPS_GRP_8 CAHPS for MIPS SSM: Courteous and Helpful Office Staff
6 CAHPS_GRP_12 CAHPS for MIPS SSM: Stewardship of Patient Resources
cms_patient_experience |>
pivot_wider(
names_from = measure_cd,
values_from = prf_rate
)# A tibble: 500 × 9
org_pac_id org_nm measure_title CAHPS_GRP_1 CAHPS_GRP_2 CAHPS_GRP_3
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 0446157747 USC CARE MEDICA… CAHPS for MI… 63 NA NA
2 0446157747 USC CARE MEDICA… CAHPS for MI… NA 87 NA
3 0446157747 USC CARE MEDICA… CAHPS for MI… NA NA 86
4 0446157747 USC CARE MEDICA… CAHPS for MI… NA NA NA
5 0446157747 USC CARE MEDICA… CAHPS for MI… NA NA NA
6 0446157747 USC CARE MEDICA… CAHPS for MI… NA NA NA
7 0446162697 ASSOCIATION OF … CAHPS for MI… 59 NA NA
8 0446162697 ASSOCIATION OF … CAHPS for MI… NA 85 NA
9 0446162697 ASSOCIATION OF … CAHPS for MI… NA NA 83
10 0446162697 ASSOCIATION OF … CAHPS for MI… NA NA NA
# ℹ 490 more rows
# ℹ 3 more variables: CAHPS_GRP_5 <dbl>, CAHPS_GRP_8 <dbl>, CAHPS_GRP_12 <dbl>
cms_patient_experience |>
pivot_wider(
id_cols = starts_with("org"),
names_from = measure_cd,
values_from = prf_rate
)# A tibble: 95 × 8
org_pac_id org_nm CAHPS_GRP_1 CAHPS_GRP_2 CAHPS_GRP_3 CAHPS_GRP_5 CAHPS_GRP_8
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0446157747 USC C… 63 87 86 57 85
2 0446162697 ASSOC… 59 85 83 63 88
3 0547164295 BEAVE… 49 NA 75 44 73
4 0749333730 CAPE … 67 84 85 65 82
5 0840104360 ALLIA… 66 87 87 64 87
6 0840109864 REX H… 73 87 84 67 91
7 0840513552 SCL H… 58 83 76 58 78
8 0941545784 GRITM… 46 86 81 54 NA
9 1052612785 COMMU… 65 84 80 58 87
10 1254237779 OUR L… 61 NA NA 65 NA
# ℹ 85 more rows
# ℹ 1 more variable: CAHPS_GRP_12 <dbl>
5.4.1 How does pivot_wider() work?
df <- tribble(
~id, ~measurement, ~value,
"A", "bp1", 100,
"B", "bp1", 140,
"B", "bp2", 115,
"A", "bp2", 120,
"A", "bp3", 105
)df |>
pivot_wider(
names_from = measurement,
values_from = value
)# A tibble: 2 × 4
id bp1 bp2 bp3
<chr> <dbl> <dbl> <dbl>
1 A 100 120 105
2 B 140 115 NA
df |>
distinct(measurement) |>
pull()[1] "bp1" "bp2" "bp3"
df |>
select(-measurement, -value) |>
distinct()# A tibble: 2 × 1
id
<chr>
1 A
2 B
df |>
select(-measurement, -value) |>
distinct() |>
mutate(x = NA, y = NA, z = NA)# A tibble: 2 × 4
id x y z
<chr> <lgl> <lgl> <lgl>
1 A NA NA NA
2 B NA NA NA
df <- tribble(
~id, ~measurement, ~value,
"A", "bp1", 100,
"A", "bp1", 102,
"A", "bp2", 120,
"B", "bp1", 140,
"B", "bp2", 115
)df |>
pivot_wider(
names_from = measurement,
values_from = value
)# A tibble: 2 × 3
id bp1 bp2
<chr> <list> <list>
1 A <dbl [2]> <dbl [1]>
2 B <dbl [1]> <dbl [1]>
df |>
group_by(id, measurement) |>
summarize(n = n(), .groups = "drop") |>
filter(n > 1)# A tibble: 1 × 3
id measurement n
<chr> <chr> <int>
1 A bp1 2
Exercise 2
ggplot(daily_counts, aes(x = Date, y = Daily)) +
geom_line(color = "steelblue") +
ggtitle("Daily Confirmed COVID-19 Cases by Country") +
facet_wrap(~ Country_Region, scales = "free_y") +
theme_minimal()Exercise 3
selected_countries <- c("US", "India", "Brazil", "Germany", "South Africa")
daily_counts_subset <- time_series_confirmed_long_daily |>
filter(Country_Region %in% selected_countries)
ggplot(daily_counts_subset, aes(x = Date, y = Daily, color = Country_Region)) +
geom_line(size = 0.6, alpha = 0.8) +
labs(
title = "Daily Confirmed COVID-19 Cases",
subtitle = "Selected Countries: US, India, Brazil, Germany, South Africa",
x = "Date",
y = "Daily Cases",
color = "Country"
) +
theme_minimal() +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10)
)Exercise 4
download.file(
url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv",
destfile = "data/time_series_covid19_deaths_global.csv"
)deaths_raw <- read_csv("data/time_series_covid19_deaths_global.csv")
deaths_long <- deaths_raw |>
pivot_longer(cols = starts_with("1") | starts_with("2"), names_to = "Date", values_to = "Deaths") |>
mutate(Date = as.Date(Date, format = "%m/%d/%y")) |>
group_by(`Country/Region`, Date) |>
summarise(Deaths = sum(Deaths), .groups = "drop") |>
rename(Country_Region = `Country/Region`)selected_countries <- c("US", "Canada", "Mexico")
deaths_subset <- deaths_long |>
filter(Country_Region %in% selected_countries)ggplot(deaths_subset, aes(x = Date, y = Deaths, color = Country_Region)) +
geom_line(size = 0.8) +
labs(
title = "Cumulative COVID-19 Deaths",
subtitle = "US, Canada, and Mexico",
x = "Date",
y = "Total Deaths",
color = "Country"
) +
theme_minimal() +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10)
)Exercise 5
deaths_raw <- read_csv("data/time_series_covid19_deaths_global.csv")
deaths_long <- deaths_raw |>
pivot_longer(cols = starts_with("1") | starts_with("2"), names_to = "Date", values_to = "Deaths") |>
mutate(Date = as.Date(Date, format = "%m/%d/%y")) |>
group_by(`Country/Region`, Date) |>
summarise(Deaths = sum(Deaths), .groups = "drop") |>
rename(Country_Region = `Country/Region`) |>
arrange(Country_Region, Date)
deaths_daily <- deaths_long |>
group_by(Country_Region) |>
mutate(Daily_Deaths = Deaths - lag(Deaths, default = first(Deaths))) |>
ungroup()
selected_countries <- c("US", "India", "Brazil", "Germany", "South Africa")
deaths_subset <- deaths_daily |>
filter(Country_Region %in% selected_countries)
ggplot(deaths_subset, aes(x = Date, y = Daily_Deaths, color = Country_Region)) +
geom_line(size = 0.7, alpha = 0.85) +
labs(
title = "Daily COVID-19 Deaths",
subtitle = "US, India, Brazil, Germany, South Africa",
x = "Date",
y = "Deaths per Day",
color = "Country"
) +
theme_minimal() +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10)
)Exercise 6
library(ggplot2)
library(gganimate)
us_daily <- time_series_confirmed_long_daily |>
filter(Country_Region == "US")
p <- ggplot(us_daily, aes(x = Date, y = Daily)) +
geom_col(fill = "steelblue") +
labs(
title = "Daily Confirmed COVID-19 Cases in the US",
x = "Date",
y = "Cases per Day"
) +
coord_cartesian(ylim = c(0, 1000000)) +
theme_minimal() +
transition_time(Date) +
shadow_mark(past = TRUE, future = FALSE)
animate(p, renderer = gifski_renderer(), end_pause = 15)anim_save("daily_confirmed_US.gif", p)